"""passlib.pwd -- password generation helpers"""
#=============================================================================
# imports
#=============================================================================
from __future__ import absolute_import, division, print_function, unicode_literals
# core
import codecs
from collections import defaultdict
try:
    from collections.abc import MutableMapping
except ImportError:
    # py2 compat
    from collections import MutableMapping
from math import ceil, log as logf
import logging; log = logging.getLogger(__name__)
import pkg_resources
import os
# site
# pkg
from passlib import exc
from passlib.utils.compat import PY2, irange, itervalues, int_types
from passlib.utils import rng, getrandstr, to_unicode
from passlib.utils.decor import memoized_property
# local
__all__ = [
    "genword", "default_charsets",
    "genphrase", "default_wordsets",
]

#=============================================================================
# constants
#=============================================================================

# XXX: rename / publically document this map?
entropy_aliases = dict(
    # barest protection from throttled online attack
    unsafe=12,

    # some protection from unthrottled online attack
    weak=24,

    # some protection from offline attacks
    fair=36,

    # reasonable protection from offline attacks
    strong=48,

    # very good protection from offline attacks
    secure=60,
)

#=============================================================================
# internal helpers
#=============================================================================

def _superclasses(obj, cls):
    """return remaining classes in object's MRO after cls"""
    mro = type(obj).__mro__
    return mro[mro.index(cls)+1:]


def _self_info_rate(source):
    """
    returns 'rate of self-information' --
    i.e. average (per-symbol) entropy of the sequence **source**,
    where probability of a given symbol occurring is calculated based on
    the number of occurrences within the sequence itself.

    if all elements of the source are unique, this should equal ``log(len(source), 2)``.

    :arg source:
        iterable containing 0+ symbols
        (e.g. list of strings or ints, string of characters, etc).

    :returns:
        float bits of entropy
    """
    try:
        size = len(source)
    except TypeError:
        # if len() doesn't work, calculate size by summing counts later
        size = None
    counts = defaultdict(int)
    for char in source:
        counts[char] += 1
    if size is None:
        values = counts.values()
        size = sum(values)
    else:
        values = itervalues(counts)
    if not size:
        return 0
    # NOTE: the following performs ``- sum(value / size * logf(value / size, 2) for value in values)``,
    #       it just does so with as much pulled out of the sum() loop as possible...
    return logf(size, 2) - sum(value * logf(value, 2) for value in values) / size


# def _total_self_info(source):
#     """
#     return total self-entropy of a sequence
#     (the average entropy per symbol * size of sequence)
#     """
#     return _self_info_rate(source) * len(source)


def _open_asset_path(path, encoding=None):
    """
    :param asset_path:
        string containing absolute path to file,
        or package-relative path using format
        ``"python.module:relative/file/path"``.

    :returns:
        filehandle opened in 'rb' mode
        (unless encoding explicitly specified)
    """
    if encoding:
        return codecs.getreader(encoding)(_open_asset_path(path))
    if os.path.isabs(path):
        return open(path, "rb")
    package, sep, subpath = path.partition(":")
    if not sep:
        raise ValueError("asset path must be absolute file path "
                         "or use 'pkg.name:sub/path' format: %r" % (path,))
    return pkg_resources.resource_stream(package, subpath)


#: type aliases
_sequence_types = (list, tuple)
_set_types = (set, frozenset)

#: set of elements that ensure_unique() has validated already.
_ensure_unique_cache = set()


def _ensure_unique(source, param="source"):
    """
    helper for generators --
    Throws ValueError if source elements aren't unique.
    Error message will display (abbreviated) repr of the duplicates in a string/list
    """
    # check cache to speed things up for frozensets / tuples / strings
    cache = _ensure_unique_cache
    hashable = True
    try:
        if source in cache:
            return True
    except TypeError:
        hashable = False

    # check if it has dup elements
    if isinstance(source, _set_types) or len(set(source)) == len(source):
        if hashable:
            try:
                cache.add(source)
            except TypeError:
                # XXX: under pypy, "list() in set()" above doesn't throw TypeError,
                #      but trying to add unhashable it to a set *does*.
                pass
        return True

    # build list of duplicate values
    seen = set()
    dups = set()
    for elem in source:
        (dups if elem in seen else seen).add(elem)
    dups = sorted(dups)
    trunc = 8
    if len(dups) > trunc:
        trunc = 5
    dup_repr = ", ".join(repr(str(word)) for word in dups[:trunc])
    if len(dups) > trunc:
        dup_repr += ", ... plus %d others" % (len(dups) - trunc)

    # throw error
    raise ValueError("`%s` cannot contain duplicate elements: %s" %
                     (param, dup_repr))

#=============================================================================
# base generator class
#=============================================================================
class SequenceGenerator(object):
    """
    Base class used by word & phrase generators.

    These objects take a series of options, corresponding
    to those of the :func:`generate` function.
    They act as callables which can be used to generate a password
    or a list of 1+ passwords. They also expose some read-only
    informational attributes.

    Parameters
    ----------
    :param entropy:
        Optionally specify the amount of entropy the resulting passwords
        should contain (as measured with respect to the generator itself).
        This will be used to auto-calculate the required password size.

    :param length:
        Optionally specify the length of password to generate,
        measured as count of whatever symbols the subclass uses (characters or words).
        Note if ``entropy`` requires a larger minimum length,
        that will be used instead.

    :param rng:
        Optionally provide a custom RNG source to use.
        Should be an instance of :class:`random.Random`,
        defaults to :class:`random.SystemRandom`.

    Attributes
    ----------
    .. autoattribute:: length
    .. autoattribute:: symbol_count
    .. autoattribute:: entropy_per_symbol
    .. autoattribute:: entropy

    Subclassing
    -----------
    Subclasses must implement the ``.__next__()`` method,
    and set ``.symbol_count`` before calling base ``__init__`` method.
    """
    #=============================================================================
    # instance attrs
    #=============================================================================

    #: requested size of final password
    length = None

    #: requested entropy of final password
    requested_entropy = "strong"

    #: random number source to use
    rng = rng

    #: number of potential symbols (must be filled in by subclass)
    symbol_count = None

    #=============================================================================
    # init
    #=============================================================================
    def __init__(self, entropy=None, length=None, rng=None, **kwds):

        # make sure subclass set things up correctly
        assert self.symbol_count is not None, "subclass must set .symbol_count"

        # init length & requested entropy
        if entropy is not None or length is None:
            if entropy is None:
                entropy = self.requested_entropy
            entropy = entropy_aliases.get(entropy, entropy)
            if entropy <= 0:
                raise ValueError("`entropy` must be positive number")
            min_length = int(ceil(entropy / self.entropy_per_symbol))
            if length is None or length < min_length:
                length = min_length

        self.requested_entropy = entropy

        if length < 1:
            raise ValueError("`length` must be positive integer")
        self.length = length

        # init other common options
        if rng is not None:
            self.rng = rng

        # hand off to parent
        if kwds and _superclasses(self, SequenceGenerator) == (object,):
            raise TypeError("Unexpected keyword(s): %s" % ", ".join(kwds.keys()))
        super(SequenceGenerator, self).__init__(**kwds)

    #=============================================================================
    # informational helpers
    #=============================================================================

    @memoized_property
    def entropy_per_symbol(self):
        """
        Average entropy per symbol (assuming all symbols have equal probability)
        """
        return logf(self.symbol_count, 2)

    @memoized_property
    def entropy(self):
        """
        Effective entropy of generated passwords.

        This value will always be a multiple of :attr:`entropy_per_symbol`.
        If entropy is specified in constructor, :attr:`length` will be chosen so
        so that this value is the smallest multiple >= :attr:`requested_entropy`.
        """
        return self.length * self.entropy_per_symbol

    #=============================================================================
    # generation
    #=============================================================================
    def __next__(self):
        """main generation function, should create one password/phrase"""
        raise NotImplementedError("implement in subclass")

    def __call__(self, returns=None):
        """
        frontend used by genword() / genphrase() to create passwords
        """
        if returns is None:
            return next(self)
        elif isinstance(returns, int_types):
            return [next(self) for _ in irange(returns)]
        elif returns is iter:
            return self
        else:
            raise exc.ExpectedTypeError(returns, "<None>, int, or <iter>", "returns")

    def __iter__(self):
        return self

    if PY2:
        def next(self):
            return self.__next__()

    #=============================================================================
    # eoc
    #=============================================================================

#=============================================================================
# default charsets
#=============================================================================

#: global dict of predefined characters sets
default_charsets = dict(
    # ascii letters, digits, and some punctuation
    ascii_72='0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!@#$%^&*?/',

    # ascii letters and digits
    ascii_62='0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',

    # ascii_50, without visually similar '1IiLl', '0Oo', '5S', '8B'
    ascii_50='234679abcdefghjkmnpqrstuvwxyzACDEFGHJKMNPQRTUVWXYZ',

    # lower case hexadecimal
    hex='0123456789abcdef',
)

#=============================================================================
# password generator
#=============================================================================

class WordGenerator(SequenceGenerator):
    """
    Class which generates passwords by randomly choosing from a string of unique characters.

    Parameters
    ----------
    :param chars:
        custom character string to draw from.

    :param charset:
        predefined charset to draw from.

    :param \*\*kwds:
        all other keywords passed to the :class:`SequenceGenerator` parent class.

    Attributes
    ----------
    .. autoattribute:: chars
    .. autoattribute:: charset
    .. autoattribute:: default_charsets
    """
    #=============================================================================
    # instance attrs
    #=============================================================================

    #: Predefined character set in use (set to None for instances using custom 'chars')
    charset = "ascii_62"

    #: string of chars to draw from -- usually filled in from charset
    chars = None

    #=============================================================================
    # init
    #=============================================================================
    def __init__(self, chars=None, charset=None, **kwds):

        # init chars and charset
        if chars:
            if charset:
                raise TypeError("`chars` and `charset` are mutually exclusive")
        else:
            if not charset:
                charset = self.charset
                assert charset
            chars = default_charsets[charset]
        self.charset = charset
        chars = to_unicode(chars, param="chars")
        _ensure_unique(chars, param="chars")
        self.chars = chars

        # hand off to parent
        super(WordGenerator, self).__init__(**kwds)
        # log.debug("WordGenerator(): entropy/char=%r", self.entropy_per_symbol)

    #=============================================================================
    # informational helpers
    #=============================================================================

    @memoized_property
    def symbol_count(self):
        return len(self.chars)

    #=============================================================================
    # generation
    #=============================================================================

    def __next__(self):
        # XXX: could do things like optionally ensure certain character groups
        #      (e.g. letters & punctuation) are included
        return getrandstr(self.rng, self.chars, self.length)

    #=============================================================================
    # eoc
    #=============================================================================


def genword(entropy=None, length=None, returns=None, **kwds):
    """Generate one or more random passwords.

    This function uses :mod:`random.SystemRandom` to generate
    one or more passwords using various character sets.
    The complexity of the password can be specified
    by size, or by the desired amount of entropy.

    Usage Example::

        >>> # generate a random alphanumeric string with 48 bits of entropy (the default)
        >>> from passlib import pwd
        >>> pwd.genword()
        'DnBHvDjMK6'

        >>> # generate a random hexadecimal string with 52 bits of entropy
        >>> pwd.genword(entropy=52, charset="hex")
        '310f1a7ac793f'

    :param entropy:
        Strength of resulting password, measured in 'guessing entropy' bits.
        An appropriate **length** value will be calculated
        based on the requested entropy amount, and the size of the character set.

        This can be a positive integer, or one of the following preset
        strings: ``"weak"`` (24), ``"fair"`` (36),
        ``"strong"`` (48), and ``"secure"`` (56).

        If neither this or **length** is specified, **entropy** will default
        to ``"strong"`` (48).

    :param length:
        Size of resulting password, measured in characters.
        If omitted, the size is auto-calculated based on the **entropy** parameter.

        If both **entropy** and **length** are specified,
        the stronger value will be used.

    :param returns:
        Controls what this function returns:

        * If ``None`` (the default), this function will generate a single password.
        * If an integer, this function will return a list containing that many passwords.
        * If the ``iter`` constant, will return an iterator that yields passwords.

    :param chars:

        Optionally specify custom string of characters to use when randomly
        generating a password. This option cannot be combined with **charset**.

    :param charset:

        The predefined character set to draw from (if not specified by **chars**).
        There are currently four presets available:

        * ``"ascii_62"`` (the default) -- all digits and ascii upper & lowercase letters.
          Provides ~5.95 entropy per character.

        * ``"ascii_50"`` -- subset which excludes visually similar characters
          (``1IiLl0Oo5S8B``). Provides ~5.64 entropy per character.

        * ``"ascii_72"`` -- all digits and ascii upper & lowercase letters,
          as well as some punctuation. Provides ~6.17 entropy per character.

        * ``"hex"`` -- Lower case hexadecimal.  Providers 4 bits of entropy per character.

    :returns:
        :class:`!unicode` string containing randomly generated password;
        or list of 1+ passwords if :samp:`returns={int}` is specified.
    """
    gen = WordGenerator(length=length, entropy=entropy, **kwds)
    return gen(returns)

#=============================================================================
# default wordsets
#=============================================================================

def _load_wordset(asset_path):
    """
    load wordset from compressed datafile within package data.
    file should be utf-8 encoded

    :param asset_path:
        string containing  absolute path to wordset file,
        or "python.module:relative/file/path".

    :returns:
        tuple of words, as loaded from specified words file.
    """
    # open resource file, convert to tuple of words (strip blank lines & ws)
    with _open_asset_path(asset_path, "utf-8") as fh:
        gen = (word.strip() for word in fh)
        words = tuple(word for word in gen if word)

    # NOTE: works but not used
    # # detect if file uses "<int> <word>" format, and strip numeric prefix
    # def extract(row):
    #     idx, word = row.replace("\t", " ").split(" ", 1)
    #     if not idx.isdigit():
    #         raise ValueError("row is not dice index + word")
    #     return word
    # try:
    #     extract(words[-1])
    # except ValueError:
    #     pass
    # else:
    #     words = tuple(extract(word) for word in words)

    log.debug("loaded %d-element wordset from %r", len(words), asset_path)
    return words


class WordsetDict(MutableMapping):
    """
    Special mapping used to store dictionary of wordsets.
    Different from a regular dict in that some wordsets
    may be lazy-loaded from an asset path.
    """

    #: dict of key -> asset path
    paths = None

    #: dict of key -> value
    _loaded = None

    def __init__(self, *args, **kwds):
        self.paths = {}
        self._loaded = {}
        super(WordsetDict, self).__init__(*args, **kwds)

    def __getitem__(self, key):
        try:
            return self._loaded[key]
        except KeyError:
            pass
        path = self.paths[key]
        value = self._loaded[key] = _load_wordset(path)
        return value

    def set_path(self, key, path):
        """
        set asset path to lazy-load wordset from.
        """
        self.paths[key] = path

    def __setitem__(self, key, value):
        self._loaded[key] = value

    def __delitem__(self, key):
        if key in self:
            del self._loaded[key]
            self.paths.pop(key, None)
        else:
            del self.paths[key]

    @property
    def _keyset(self):
        keys = set(self._loaded)
        keys.update(self.paths)
        return keys

    def __iter__(self):
        return iter(self._keyset)

    def __len__(self):
        return len(self._keyset)

    # NOTE: speeds things up, and prevents contains from lazy-loading
    def __contains__(self, key):
        return key in self._loaded or key in self.paths


#: dict of predefined word sets.
#: key is name of wordset, value should be sequence of words.
default_wordsets = WordsetDict()

# register the wordsets built into passlib
for name in "eff_long eff_short eff_prefixed bip39".split():
    default_wordsets.set_path(name, "passlib:_data/wordsets/%s.txt" % name)

#=============================================================================
# passphrase generator
#=============================================================================
class PhraseGenerator(SequenceGenerator):
    """class which generates passphrases by randomly choosing
    from a list of unique words.

    :param wordset:
        wordset to draw from.
    :param preset:
        name of preset wordlist to use instead of ``wordset``.
    :param spaces:
        whether to insert spaces between words in output (defaults to ``True``).
    :param \*\*kwds:
        all other keywords passed to the :class:`SequenceGenerator` parent class.

    .. autoattribute:: wordset
    """
    #=============================================================================
    # instance attrs
    #=============================================================================

    #: predefined wordset to use
    wordset = "eff_long"

    #: list of words to draw from
    words = None

    #: separator to use when joining words
    sep = " "

    #=============================================================================
    # init
    #=============================================================================
    def __init__(self, wordset=None, words=None, sep=None, **kwds):

        # load wordset
        if words is not None:
            if wordset is not None:
                raise TypeError("`words` and `wordset` are mutually exclusive")
        else:
            if wordset is None:
                wordset = self.wordset
                assert wordset
            words = default_wordsets[wordset]
        self.wordset = wordset

        # init words
        if not isinstance(words, _sequence_types):
            words = tuple(words)
        _ensure_unique(words, param="words")
        self.words = words

        # init separator
        if sep is None:
            sep = self.sep
        sep = to_unicode(sep, param="sep")
        self.sep = sep

        # hand off to parent
        super(PhraseGenerator, self).__init__(**kwds)
        ##log.debug("PhraseGenerator(): entropy/word=%r entropy/char=%r min_chars=%r",
        ##          self.entropy_per_symbol, self.entropy_per_char, self.min_chars)

    #=============================================================================
    # informational helpers
    #=============================================================================

    @memoized_property
    def symbol_count(self):
        return len(self.words)

    #=============================================================================
    # generation
    #=============================================================================

    def __next__(self):
        words = (self.rng.choice(self.words) for _ in irange(self.length))
        return self.sep.join(words)

    #=============================================================================
    # eoc
    #=============================================================================


def genphrase(entropy=None, length=None, returns=None, **kwds):
    """Generate one or more random password / passphrases.

    This function uses :mod:`random.SystemRandom` to generate
    one or more passwords; it can be configured to generate
    alphanumeric passwords, or full english phrases.
    The complexity of the password can be specified
    by size, or by the desired amount of entropy.

    Usage Example::

        >>> # generate random phrase with 48 bits of entropy
        >>> from passlib import pwd
        >>> pwd.genphrase()
        'gangly robbing salt shove'

        >>> # generate a random phrase with 52 bits of entropy
        >>> # using a particular wordset
        >>> pwd.genword(entropy=52, wordset="bip39")
        'wheat dilemma reward rescue diary'

    :param entropy:
        Strength of resulting password, measured in 'guessing entropy' bits.
        An appropriate **length** value will be calculated
        based on the requested entropy amount, and the size of the word set.

        This can be a positive integer, or one of the following preset
        strings: ``"weak"`` (24), ``"fair"`` (36),
        ``"strong"`` (48), and ``"secure"`` (56).

        If neither this or **length** is specified, **entropy** will default
        to ``"strong"`` (48).

    :param length:
        Length of resulting password, measured in words.
        If omitted, the size is auto-calculated based on the **entropy** parameter.

        If both **entropy** and **length** are specified,
        the stronger value will be used.

    :param returns:
        Controls what this function returns:

        * If ``None`` (the default), this function will generate a single password.
        * If an integer, this function will return a list containing that many passwords.
        * If the ``iter`` builtin, will return an iterator that yields passwords.

    :param words:

        Optionally specifies a list/set of words to use when randomly generating a passphrase.
        This option cannot be combined with **wordset**.

    :param wordset:

        The predefined word set to draw from (if not specified by **words**).
        There are currently four presets available:

        ``"eff_long"`` (the default)

            Wordset containing 7776 english words of ~7 letters.
            Constructed by the EFF, it offers ~12.9 bits of entropy per word.

            This wordset (and the other ``"eff_"`` wordsets)
            were `created by the EFF <https://www.eff.org/deeplinks/2016/07/new-wordlists-random-passphrases>`_
            to aid in generating passwords.  See their announcement page
            for more details about the design & properties of these wordsets.

        ``"eff_short"``

            Wordset containing 1296 english words of ~4.5 letters.
            Constructed by the EFF, it offers ~10.3 bits of entropy per word.

        ``"eff_prefixed"``

            Wordset containing 1296 english words of ~8 letters,
            selected so that they each have a unique 3-character prefix.
            Constructed by the EFF, it offers ~10.3 bits of entropy per word.

        ``"bip39"``

            Wordset of 2048 english words of ~5 letters,
            selected so that they each have a unique 4-character prefix.
            Published as part of Bitcoin's `BIP 39 <https://github.com/bitcoin/bips/blob/master/bip-0039/english.txt>`_,
            this wordset has exactly 11 bits of entropy per word.

            This list offers words that are typically shorter than ``"eff_long"``
            (at the cost of slightly less entropy); and much shorter than
            ``"eff_prefixed"`` (at the cost of a longer unique prefix).

    :param sep:
        Optional separator to use when joining words.
        Defaults to ``" "`` (a space), but can be an empty string, a hyphen, etc.

    :returns:
        :class:`!unicode` string containing randomly generated passphrase;
        or list of 1+ passphrases if :samp:`returns={int}` is specified.
    """
    gen = PhraseGenerator(entropy=entropy, length=length, **kwds)
    return gen(returns)

#=============================================================================
# strength measurement
#
# NOTE:
# for a little while, had rough draft of password strength measurement alg here.
# but not sure if there's value in yet another measurement algorithm,
# that's not just duplicating the effort of libraries like zxcbn.
# may revive it later, but for now, leaving some refs to others out there:
#    * NIST 800-63 has simple alg
#    * zxcvbn (https://tech.dropbox.com/2012/04/zxcvbn-realistic-password-strength-estimation/)
#      might also be good, and has approach similar to composite approach i was already thinking about,
#      but much more well thought out.
#    * passfault (https://github.com/c-a-m/passfault) looks thorough,
#      but may have licensing issues, plus porting to python looks like very big job :(
#    * give a look at running things through zlib - might be able to cheaply
#      catch extra redundancies.
#=============================================================================

#=============================================================================
# eof
#=============================================================================
